library(tidyverse)
library(haven)

# Replication: 
# Figure 1 (starting line 10)
# Figure 2 (starting line 36)
# Table 1 (starting line 94)
# Appendix Table A1 (starting line 161)

# FIGURE 1

cross_tabs_round1 <- read_csv("2017_cross_tabs_round1.csv")
cross_tabs_round1 <- cross_tabs_round1[1:11,]

cross_tabs_round1[,2:10] <- lapply(cross_tabs_round1[,2:10],function(x){x/sum(x)})

averages <- cross_tabs_round1 %>%
  gather(key,percentage,-value) %>%
  group_by(key) %>%
  summarise(wgt_mean=weighted.mean(value,percentage))
  
cross_tabs_round1 %>%
  gather(key,percentage,-value) %>%
  left_join(averages,"key") %>%
  mutate(key=factor(key,levels=c("Macron","Mélenchon","Le Pen","Fillon",
                                 "Hamon","D.-Aignant","Poutou","Arthaud","Cheminade"),
                    labels=c("Macron; avg = 6.27","Mélenchon; avg = 4.65","Le Pen; avg = 4.63","Fillon; avg = 4.17",
                             "Hamon; avg = 2.56","D.-Aignant; avg = 1.12","Poutou; avg = 0.71","Arthaud; avg = 0.56","Cheminade; avg = 0.55"))) %>%
  ggplot(aes(x=value,y=percentage)) +
  geom_bar(stat="identity",position="dodge") +
  facet_wrap(~key) + theme_bw() + labs(x="Likelihood forecast",y="% of respondents") +
  scale_y_continuous(labels = function(x){paste0(x," %")},limits=c(0,1)) + 
  geom_text(aes(label=round(percentage*100)),
            color="black",vjust = -0.3, size = 3)

# Figure 2 

# write functions

mae = function(x){ # x = y - y.hat
  mean(abs(x), na.rm=T)
}
rmse = function(x){ # x = y - y.hat
  sqrt(mean(x^2, na.rm=T))
}

# load data

data = read.csv("~/Downloads/french-presidential-election-2017.csv", header=T)

# compute precise vote shares

data$shr1 = with(data, votes1 / sum(votes1, na.rm=T))
data$shr2 = with(data, votes2 / sum(votes2, na.rm=T))

# compute forecasts for candidate i = likelihood_i / sum_j^1^n likelihood_j

data$forecast1 = with(data, avg1 / sum(avg1, na.rm=T))
data$forecast2 = with(data, avg2 / sum(avg2, na.rm=T))

# compute error = actual - forecast

data$error1 = with(data, shr1 - forecast1)
data$error2 = with(data, shr2 - forecast2)

# compute mae and rmse

mae1 = mae(data$error1)
mae2 = mae(data$error2)
rmse1 = rmse(data$error1)
rmse2 = rmse(data$error2)

par(mar=c(3,3.5,2,1), mgp=c(2,.7,0), tck=-.01, las=1)
j = c(rep(1, nrow(data)), rep(2, nrow(data)))
l = sub(".*? ", "", data$candidate)
x = c(data$forecast1, data$forecast2)
y = c(data$shr1, data$shr2)
r = range(c(x, y), na.rm=T)
col = c(rep("black", nrow(data)), rep("darkgrey", nrow(data)))
pch = c(rep(16, nrow(data)), rep(17, nrow(data)))
p = rep(4, nrow(data)*2)
p[l=="Macron" & j==2] = 2
p[l=="Fillon" & j==1] = 2
p[l=="Arthaud" & j==1] = 2
l[l=="Arthaud"] = "Arth."
plot(x, y, xlim=r, ylim=r, xlab="Forecasted vote share", ylab="", type="n")
grid()
points(x, y, pch=pch, col=col)
text(x, y, l, pos=p, cex=.75, col=col)
abline(a=0, b=1, lty=2)
mtext(side=2, line=2.5, "Actual vote share", las=3)
legend("topleft", c("First round", "Second round"), bty="n", text.col=c("black", "darkgrey"), border="n", pch=16:17, col=c("black", "darkgrey"))

# Table 1

df <- read_rds("OpinionWay Probability survey November 2021.rds")

p1<-prop.table(table(df$io1_i1)) %>% c() %>% round(3)
m1<-mean(df$io1_i1,na.rm = TRUE)
p2<-prop.table(table(df$io1_i2)) %>% c() %>% round(3)
m2<-mean(df$io1_i2,na.rm = TRUE)
p3<-prop.table(table(df$io1_i3)) %>% c() %>% round(3)
m3<-mean(df$io1_i3,na.rm = TRUE)
p4<-prop.table(table(df$io1_i4)) %>% c() %>% round(3)
m4<-mean(df$io1_i4,na.rm = TRUE)
p5<-prop.table(table(df$io1_i5)) %>% c() %>% round(3)
m5<-mean(df$io1_i5,na.rm = TRUE)
p6<-prop.table(table(df$io1_i6)) %>% c() %>% round(3)
m6<-mean(df$io1_i6,na.rm = TRUE)
p7<-prop.table(table(df$io1_i7)) %>% c() %>% round(3)
m7<-mean(df$io1_i7,na.rm = TRUE)
p8<-prop.table(table(df$io1_i8)) %>% c() %>% round(3)
m8<-mean(df$io1_i8,na.rm = TRUE)
p9<-prop.table(table(df$io1_i9)) %>% c() %>% round(3)
m9<-mean(df$io1_i9,na.rm = TRUE)

names_ <- c("io1_i1","io1_i2","io1_i3","io1_i4","io1_i5","io1_i6","io1_i7","io1_i8","io1_i9")

col_names <- sapply(names_, function(x){
  t <- attr(df[[x]],"label")
  str_match_all(t, "(?<=\\().+?(?=\\))")[[1]][[1]]  
}) %>% unname()

my_plot <- bind_rows(tibble(p1,p2,p3,p4,p5,p6,p7,p8,p9),
                     bind_cols(lapply(tibble(p1,p2,p3,p4,p5,p6,p7,p8,p9),function(x){sum(x*(0:10))})))

names(my_plot) <- col_names

# Remove Barnier & Bertrand
my_plot <- my_plot %>% select(-`Xavier Bertrand`,-`Michel Barnier`)

v <- unname(unlist(my_plot[12,]))


CF <- tibble(CF=v / sum(v),Leader = names(my_plot))
CF$Leader[CF$Leader=="Eric Zemmour"] <- "Éric Zemmour"
CF$Leader[CF$Leader=="Jean Luc Mélenchon"] <- "Jean-Luc Mélenchon"
# Vote Intentions

df %>%
  select(ivt1q2,Weight) %>%
  mutate(intention=as_factor(ivt1q2)) %>%
  filter(complete.cases(.)) %>%
  group_by(intention) %>%
  summarise(sum_w = sum(Weight)) %>%
  filter(intention!="Vous voteriez blanc ou nul",intention!="Vous vous abstiendrez") %>%
  ungroup() %>%
  mutate(vote_int = sum_w / sum(sum_w)) %>% rename(Leader=intention) %>%
  select(-sum_w) %>%
  left_join(CF) %>%
  mutate(CF=round(CF,3)) %>% 
  arrange(desc(CF)) %>%
  select(Leader,CF,vote_int)

# removing no intention (Blanc, abstention) (98,99)
sum(df$ivt1q2<98,na.rm=TRUE)

# avg n CF
mean(sapply(df[,names(df)[2:10]],function(x){sum(!is.na(x))}))

# Appendix Table A1

cross_tabs_round2 <- read_csv("2017_cross_tabs_round2.csv")

cross_tabs_round2 <- cross_tabs_round2[1:11,]

lapply(cross_tabs_round2[,2:3],sum)

cross_tabs_round2[,2:3] <- lapply(cross_tabs_round2[,2:3],function(x){x/sum(x)})

averages <- cross_tabs_round2 %>%
  gather(key,percentage,-value) %>%
  group_by(key) %>%
  summarise(wgt_mean=weighted.mean(value,percentage))

cross_tabs_round2
averages

